Visualization of WQ/NUT medians, trends, and the uncertainty in each

Published

June 4, 2024

In R/Analyses_for_paper/01b_medians_and_their_distns.qmd, we went a step further than calculating long-term medians. For each parameter, we used the compiled monthly medians (all months) to generate quantiles describing the monthly medians. So now, not only do we have a single long-term median for each parameter at each station, we also have min/max (WQ only); 5th, 10th, 25th, 50th, 75th, 90th, and 95th percentiles. For nutrients, robust regression on order statistics (ROS) was used to calculate these, and min/max were excluded but the others were generated by default. For WQ, the normal quantile R function was used to generate all of the listed quantiles.

In this file, those will be joined with descriptive characteristics - bioregion from SWMP clue, for ordering; and clusters from Carl and Dave’s work - and I’ll generate a figure for each parameter across all stations. In the first part, the parameters will be in the same order in every figure (bioregion – reserve – station), for ease of finding “your own” stations. In the second part, medians will ordered by magnitude and stations will not be labelled, for potential use in papers and/or supplementary information.

Data Import

Code
library(tidyverse)
library(patchwork)
library(khroma)
Code
medians_in <- here::here("Outputs",
                         "01_calculated_medians",
                         "WQ-NUT_overallMediansAndQuantiles.csv")
trends_in <- here::here("Outputs",
                        "02_calculated_long-term-trends",
                        "long-term-trends.csv")

trends_nut_in <- here::here("Outputs",
                        "02_calculated_long-term-trends",
                        "NUT_trends_back-transformed.csv")

# to get SWmP CLUE params, clusters, and other info
other_in <- here::here("Outputs",
                       "04_compiled_predictors",
                       "compiled_predictors_withExternalInfo.csv")
Code
meds <- read.csv(medians_in)
trnds <- read.csv(trends_in)
trnds_nut <- read.csv(trends_nut_in)
other <- read.csv(other_in)
Code
# this is the data frame that's been reduced to only stations included for final analysis, so will want to left join on this
other_trimmed <- other |> 
  select(-all_of(ends_with("_trend")),
         -all_of(ends_with("_median"))) |> 
  mutate(Ecoregion = case_when(Ecoregion == "Oregon, Washington, Vancouver Coast and Shelf" ~ "Oregon, Washington, Vancouver",
                               .default = Ecoregion))

key_stns <- other |> 
  select(reserve, station)
Code
trnds_wq <- trnds |> 
  filter(parameter %in% c("do_mgl_median",
                          "do_pct_median",
                          "sal_median",
                          "spcond_median",
                          "temp_median",
                          "turb_median")) |> 
  mutate(parameter = str_remove(parameter, "_median")) |> 
  select(station,
         param = parameter,
         trend = Slope,
         ciLow_trend = conf.low,
         ciHigh_trend = conf.high,
         p_trend = p.value,
         sig_seasonality)


trnds_wqnut <- bind_rows(trnds_wq, trnds_nut) |> 
  mutate(station = substr(station, 1, 5),
         param = case_when(param == "chla_n" ~ "chla",
                           .default = param))

trnds_and_meds <- left_join(trnds_wqnut,
                            meds,
                            by = c("station", "param"))

all <- left_join(key_stns,
                 trnds_and_meds,
                 by = "station") |> 
  left_join(other_trimmed,
            by = c("reserve", "station"))
Code
stn_factor <- other_trimmed |> 
  select(Ecoregion, reserve, station) |> 
  arrange(Ecoregion, reserve, station) |> 
  mutate(stn_factor = fct_inorder(station)) |> 
  select(station, stn_factor)

all <- left_join(all, stn_factor, by = "station")
Code
# manual shapes
shps <- c(18, 1, 2, 4, 10, 8, 9, 18, 1, 2, 4, 10, 8, 9)
# manual colors - don't vary with the same repetition as shapes,
# so every ecoregion will have a unique shape*color combination
# even though shapes and colors are not themselves unique
# pal <- c(color("muted")(9), color("muted")(4))
pal <- as.vector(color("muted")(9))
pal <- c(pal, pal)

About the graphs

Bars around points indicate:

  • left panel: 25th and 75th percentiles of monthly medians at the station
  • right panel: 95% confidence interval of the calculated trend

Vertical lines on plots indicate:

  • left panel: overall median
  • right panel: trend of 0 (no trend)

If you only look at one set of graphs, look at the first batch - organized within Ecoregion, colored by Ecoregion. I think these are the most appealing, but created other versions because I thought people would ask what they’d look like.

Stations ordered within Ecoregion

Stations are ordered by Ecoregion, followed by Reserve, then Station.

Colored by Ecoregion

WQ

Code
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")

for(i in seq_along(wq_parms)){
  parm = wq_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = Ecoregion,
                       shape = Ecoregion)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = Ecoregion,
                        shape = Ecoregion)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = trend,
                        xmin = ciLow_trend,
                        xmax = ciHigh_trend)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (measurement units/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          legend.title.position = 'top',
          legend.margin = margin(0, 0, 0, 0),
          legend.text = element_text(size = rel(0.7)),
          legend.key.spacing.y = unit(0.2, "pt"),
          axis.text.y = element_text(size = rel(0.9))) &
    guides(col = guide_legend(byrow = TRUE,
                              ncol = 3))
  
  
  print(p_out)
}

NUT

Code
nut_parms <- c("chla", "nh4f", "no23f", "po4f")

for(i in seq_along(nut_parms)){
  parm = nut_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = Ecoregion,
                       shape = Ecoregion)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_x_log10() +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = Ecoregion,
                        shape = Ecoregion)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = trend_pctPerYear,
                        xmin = ciLow_pctPerYear,
                        xmax = ciHigh_pctPerYear)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (% change/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          legend.title.position = 'top',
          legend.margin = margin(0, 0, 0, 0),
          legend.text = element_text(size = rel(0.7)),
          legend.key.spacing.y = unit(0.2, "pt"),
          axis.text.y = element_text(size = rel(0.9))) &
    guides(col = guide_legend(byrow = TRUE,
                              ncol = 3))
  
  
  print(p_out)
}

Colored by cluster

WQ

Code
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")

for(i in seq_along(wq_parms)){
  parm = wq_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = cluster,
                       shape = cluster)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = cluster,
                        shape = cluster)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = trend,
                        xmin = ciLow_trend,
                        xmax = ciHigh_trend)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (measurement units/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          axis.text.y = element_text(size = rel(0.9)))
  
  
  print(p_out)
}

NUT

Code
nut_parms <- c("chla", "nh4f", "no23f", "po4f")

for(i in seq_along(nut_parms)){
  parm = nut_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = cluster,
                       shape = cluster)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_x_log10() +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = cluster,
                        shape = cluster)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = stn_factor,
                        x = trend_pctPerYear,
                        xmin = ciLow_pctPerYear,
                        xmax = ciHigh_pctPerYear)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (% change/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          axis.text.y = element_text(size = rel(0.9)))
  
  
  print(p_out)
}

Stations ordered alphabetically

Colored by Ecoregion

WQ

Code
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")

for(i in seq_along(wq_parms)){
  parm = wq_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = Ecoregion,
                       shape = Ecoregion)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = Ecoregion,
                        shape = Ecoregion)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = trend,
                        xmin = ciLow_trend,
                        xmax = ciHigh_trend)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (measurement units/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          legend.title.position = 'top',
          legend.margin = margin(0, 0, 0, 0),
          legend.text = element_text(size = rel(0.7)),
          legend.key.spacing.y = unit(0.2, "pt"),
          axis.text.y = element_text(size = rel(0.9))) &
    guides(col = guide_legend(byrow = TRUE,
                              ncol = 3))
  
  
  print(p_out)
}

NUT

Code
nut_parms <- c("chla", "nh4f", "no23f", "po4f")

for(i in seq_along(nut_parms)){
  parm = nut_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = Ecoregion,
                       shape = Ecoregion)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_x_log10() +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = Ecoregion,
                        shape = Ecoregion)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = trend_pctPerYear,
                        xmin = ciLow_pctPerYear,
                        xmax = ciHigh_pctPerYear)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (% change/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          legend.title.position = 'top',
          legend.margin = margin(0, 0, 0, 0),
          legend.text = element_text(size = rel(0.7)),
          legend.key.spacing.y = unit(0.2, "pt"),
          axis.text.y = element_text(size = rel(0.9))) &
    guides(col = guide_legend(byrow = TRUE,
                              ncol = 3))
  
  
  print(p_out)
}

Colored by cluster

From Dave and Carl’s clustering

WQ

Code
wq_parms <- c("do_mgl", "do_pct", "sal", "spcond", "temp", "turb")

for(i in seq_along(wq_parms)){
  parm = wq_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = cluster,
                       shape = cluster)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = cluster,
                        shape = cluster)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = trend,
                        xmin = ciLow_trend,
                        xmax = ciHigh_trend)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (measurement units/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          axis.text.y = element_text(size = rel(0.9)))
  
  
  print(p_out)
}

NUT

Code
nut_parms <- c("chla", "nh4f", "no23f", "po4f")

for(i in seq_along(nut_parms)){
  parm = nut_parms[i]
  sub_dat <- filter(all, param == parm)
  
  
  p_meds <- ggplot(sub_dat,
                   aes(col = cluster,
                       shape = cluster)) +
    geom_vline(xintercept = median(sub_dat$p_50, na.rm = TRUE),
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = p_50,
                        xmin = p_25,
                        xmax = p_75)) +
    scale_x_log10() +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "monthly median")
  
  p_trnds <- ggplot(sub_dat,
                    aes(col = cluster,
                        shape = cluster)) +
    geom_vline(xintercept = 0,
               col = "gray40") +
    geom_pointrange(aes(y = station,
                        x = trend_pctPerYear,
                        xmin = ciLow_pctPerYear,
                        xmax = ciHigh_pctPerYear)) +
    scale_color_manual(values = pal) +
    scale_shape_manual(values = shps) +
    labs(y = NULL,
         x = "trend (% change/yr)")
  
  
  p_out <- p_meds + p_trnds +
    plot_annotation(title = parm,
                    caption = "bars around monthly median represent the 25th-75th percentiles of monthly \nmedians at that station; bars for trend are 95% confidence interval.\nvertical lines represent overall median across all stations (left); zero trend (right)") +
    plot_layout(guides = 'collect',
                axes = 'collect') &
    theme_bw() &
    theme(legend.position = 'bottom',
          axis.text.y = element_text(size = rel(0.9)))
  
  
  print(p_out)
}